# libraries
library(plyr)
library(tidyverse)
## Warning: Installed Rcpp (0.12.12) different from Rcpp used to build dplyr (0.12.11).
## Please reinstall dplyr to avoid random crashes or undefined behavior.
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## arrange(): dplyr, plyr
## compact(): purrr, plyr
## count(): dplyr, plyr
## failwith(): dplyr, plyr
## filter(): dplyr, stats
## id(): dplyr, plyr
## lag(): dplyr, stats
## mutate(): dplyr, plyr
## rename(): dplyr, plyr
## summarise(): dplyr, plyr
## summarize(): dplyr, plyr
library(ggplot2)
# Read files from one folder and bind them together into one df
data_directory_path = "/local/orion/bigsea/btr_2.0/training_data_1_month"
filenames <- list.files(data_directory_path, pattern="*.csv", full.names=TRUE)
list_of_dataframes <- lapply(filenames, read.csv)
filenames <- NULL
busData <- rbind.fill(list_of_dataframes)
list_of_dataframes <- NULL
Primeiramente, vamos ver as variáveis que existem nos dados:
str(busData)
## 'data.frame': 10472386 obs. of 37 variables:
## $ route : chr "203" "203" "203" "203" ...
## $ tripNumOrig : int 1 1 1 1 1 1 1 1 1 1 ...
## $ shapeId : int 3855 3855 3855 3855 3855 3855 3855 3855 3855 3855 ...
## $ shapeSequence : int 4649120 4649137 4649154 4649184 4649211 4649239 4649258 4649278 4649300 4649329 ...
## $ shapeLatOrig : num -25.5 -25.5 -25.5 -25.5 -25.5 ...
## $ shapeLonOrig : num -49.3 -49.3 -49.3 -49.3 -49.3 ...
## $ distanceTraveledShapeOrig: num 485 847 1256 1778 2243 ...
## $ busCode : Factor w/ 1519 levels "","BA007","BA014",..: 128 128 128 128 128 128 128 128 128 128 ...
## $ gpsPointId : logi NA NA NA NA NA NA ...
## $ gpsLat : num -25.5 -25.5 -25.5 -25.5 -25.5 ...
## $ gpsLon : num -49.3 -49.3 -49.3 -49.3 -49.3 ...
## $ distanceToShapePoint : num 7.75 24.43 2.65 5.63 7.77 ...
## $ timestampOrig : Factor w/ 69396 levels "","04:40:23",..: 193 204 235 267 291 317 336 367 383 405 ...
## $ busStopIdOrig : int 25739 25737 25540 26197 25924 25925 26009 25523 26010 25733 ...
## $ problem : Factor w/ 4 levels "BETWEEN","NO_PROBLEM",..: 2 2 2 2 2 2 1 2 2 2 ...
## $ date : Factor w/ 28 levels "2017-02-01","2017-02-02",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ busStopIdDest : int 25737 25540 26197 25924 25925 26009 25523 26010 25733 25732 ...
## $ timestampDest : Factor w/ 69386 levels "","04:45:17",..: 191 219 251 274 298 317 348 364 384 420 ...
## $ tripNumDest : int 1 1 1 1 1 1 1 1 1 1 ...
## $ shapeLatDest : num -25.5 -25.5 -25.5 -25.5 -25.5 ...
## $ shapeLonDest : num -49.3 -49.3 -49.3 -49.3 -49.3 ...
## $ distanceTraveledShapeDest: num 847 1256 1778 2243 2821 ...
## $ duration : int 41 101 100 85 72 55 94 58 70 92 ...
## $ distance : num 363 409 522 466 578 ...
## $ hourOrig : int 5 5 5 5 5 5 5 5 5 5 ...
## $ hourDest : int 5 5 5 5 5 5 5 5 5 5 ...
## $ isRushOrig : int 0 0 0 0 0 0 0 0 0 0 ...
## $ isRushDest : int 0 0 0 0 0 0 0 0 0 0 ...
## $ periodOrig : Factor w/ 3 levels "afternoon","morning",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ periodDest : Factor w/ 3 levels "afternoon","morning",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ weekDay : Factor w/ 7 levels "Fri","Mon","Sat",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ weekOfYear : int 5 5 5 5 5 5 5 5 5 5 ...
## $ dayOfMonth : int 1 1 1 1 1 1 1 1 1 1 ...
## $ month : int 2 2 2 2 2 2 2 2 2 2 ...
## $ isHoliday : int 0 0 0 0 0 0 0 0 0 0 ...
## $ isWeekend : int 0 0 0 0 0 0 0 0 0 0 ...
## $ isRegularDay : int 1 1 1 1 1 1 1 1 1 1 ...
Podemos ver que existem 37 variáveis, onde temos: * route : rota do ônibus * tripNumOrig : o número da viagem do ônibus na origem * shapeId : o id do shape do ônibus * shapeSequence : o ponto do shape que a viagem está atualmente * shapeLatOrig : a latitude da origem do shape * shapeLonOrig : a longitude da origem do shape * distanceTraveledShapeOrig: a distancia que o ônibus já viajou na origem. * busCode : o código do ônibus * gpsPointId : o id do ponto do gps * gpsLat : a latitude do ponto do gps * gpsLon : a longitude do ponto do gps * distanceToShapePoint : a distância do gps para o ponto do shape casado * timestampOrig : o horário no ponto de origem * busStopIdOrig : a parada do ônibus no ponto de origem * problem : o tipo de problema encontrado pelo BULMA que pode ser NO_PROBLEM, quando não houver problema, NO_SHAPE, quando nenhuma trip foi formada (gps sem shape na base de dados ou com pontos insuficientes para formar trip), TRIP_PROBLEM, quando não foi possível identificar ponto final/ponto inicial, OUTLIER_POINT, quando os pontos excederam o threshold de distância para o shape. * date : a data da viagem * busStopIdDest : o ponto de ônibus do destino * timestampDest : o horário no ponto de destino * tripNumDest : o número da viagem do ônibus no destino * shapeLatDest : a latitude do ponto do shape destino * shapeLonDest : a longitude do ponto do shape destino * distanceTraveledShapeDest: a distancia que o ônibus já viajou no destino * duration : a duração entre a parada origem e destino * distance : a distancia entre a parada origem e destino * hourOrig : a hora de origem * hourDest : a hora de destino * isRushOrig : indicativo se está em horário de pico na origem * isRushDest : indicativo se está me horário de pico no destino * periodOrig : o período na origem (manhã, tarde, noite) * periodDest : o período no destino (manhã, tarde, noite) * weekDay : o dia da semana * weekOfYear : a semana do ano * dayOfMonth : o dia do mês * month : o mês * isHoliday : indicativo se está em mês de férias * isWeekend : indicativo se é final de semana * isRegularDay : indicativo se está entre a terça e quinta inclusive
Análises posssíveis:
analises que queremos:
Agora vamos plotar um gráfico de duração de viagens entre paradas por horas do dia.
p <- ggplot(busData, aes(x = as.factor(hourOrig), y = duration, colour = problem))
p + geom_jitter(aes(alpha = 0.5))
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Removed 378758 rows containing missing values (geom_point).
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p <- NULL
Problemas encontrados:
Há um problema de cálculo no preprocessamento quando a parada de origem é a última parada do shape e a parada de destino é a primeira parada do shape. Filtrando esses casos obtemos o gŕafico abaixo.
temp <- busData %>% filter(distanceTraveledShapeDest > 0)
busData <- NULL
p <- ggplot(temp, aes(x = as.factor(hourOrig), y = duration, colour = problem))
p + geom_jitter(aes(alpha = 0.5))
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Removed 254489 rows containing missing values (geom_point).
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p <- NULL
Há casos em que o número da viagem do ponto de origem e o número da viagem do ponto de destino não são iguais (nem consecutivos?). Filtrando esses casos obtemos o gŕafico abaixo.
temp2 <- temp %>% filter(abs(tripNumOrig - tripNumDest) == 0)
temp <- NULL
p <- ggplot(temp2, aes(x = as.factor(hourOrig), y = duration, colour = problem))
p + geom_jitter(aes(alpha = 0.5))
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Removed 133982 rows containing missing values (geom_point).
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p <- NULL
Não é interessante que haja casos em que a parada de origem é igual à parada de destino. Vamos filtrar esses casos e visualizar como ficam os dados.
temp3 <- temp2 %>% filter(busStopIdOrig != busStopIdDest)
temp2 <- NULL
p <- ggplot(temp3, aes(x = as.factor(hourOrig), y = duration, colour = problem))
p + geom_jitter(aes(alpha = 0.5))
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Removed 76439 rows containing missing values (geom_point).
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p <- NULL
Não mudou muita coisa. Há casos em que simplesmente o ônibus demorou muito para ir de uma parada a outra e não se sabe exatamente o porquê.
Podemos calcular a velocidade média do ônibus para ir de uma parada a outra. O resultado está no gráfico abaixo.
temp3 <- temp3 %>% mutate(velocityKmh = ((distance / duration) * 3.6))
p <- ggplot(temp3, aes(x = "velocidade", y = velocityKmh, colour = problem))
p + geom_jitter(aes(alpha = 0.5))
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Removed 76439 rows containing missing values (geom_point).
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p <- NULL
Às vezes a velocidade é infinita (quando a duração é 0). Há muitas velocidades que não fazem sentido para um ônibus. A velocidade máxima para ônibus em trechos urbanos é de 90 km/h.
# Filtrando velocidades irreais (muito altas)
temp4 <- temp3 %>% filter(velocityKmh <= 90)
temp3 <- NULL
# filtrando viagens sem problemas e interpoladas
temp5 <- temp4 %>% filter(problem %in% c("NO_PROBLEM", "BETWEEN"))
temp4 <- NULL
p <- ggplot(temp5, aes(x = "velocidade", y = velocityKmh))
p + geom_jitter(aes(alpha = 0.5, colour = problem))
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p + geom_boxplot(aes(colour="red"))
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p <- NULL
É possível perceber que cerca de 75% das observações tem velocidade abaixo de 35 km/h, sendo a mediana próxima de 25 km/k.
p <- ggplot(temp5, aes(x = as.factor(hourOrig), y = duration, colour = problem))
p + geom_jitter(aes(alpha = 0.5))
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p <- NULL
É notório que existem alguns outliers na visualização acima.
Vamos ver um ponto de corte aceitável para a duração:
percentiles <- data.frame(perc = seq(0, 1, 0.0001))
percentiles <- percentiles %>%
mutate(
value = quantile(temp5$duration, perc),
diff = value - lag(value)
)
ggplot(percentiles, aes(x = perc, y = value)) +
geom_line()
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
Podemos ver que os dados estão bem concentrados até certo ponto, então vejamos as últimas observações para descobrirmos um bom ponto de corte de outliers:
percentiles %>%
arrange(-perc) %>%
head(50)
## perc value diff
## 1 1.0000 53830.000 46136.4735
## 2 0.9999 7693.526 3981.6841
## 3 0.9998 3711.842 761.8424
## 4 0.9997 2950.000 342.5788
## 5 0.9996 2607.421 275.8947
## 6 0.9995 2331.526 262.5265
## 7 0.9994 2069.000 202.0000
## 8 0.9993 1867.000 168.0000
## 9 0.9992 1699.000 111.0523
## 10 0.9991 1587.948 101.9477
## 11 0.9990 1486.000 85.0000
## 12 0.9989 1401.000 70.0000
## 13 0.9988 1331.000 65.0000
## 14 0.9987 1266.000 58.0000
## 15 0.9986 1208.000 51.0000
## 16 0.9985 1157.000 44.0000
## 17 0.9984 1113.000 39.0000
## 18 0.9983 1074.000 32.0000
## 19 0.9982 1042.000 28.0000
## 20 0.9981 1014.000 24.0000
## 21 0.9980 990.000 21.0000
## 22 0.9979 969.000 21.0000
## 23 0.9978 948.000 18.0000
## 24 0.9977 930.000 17.0000
## 25 0.9976 913.000 18.0000
## 26 0.9975 895.000 16.0000
## 27 0.9974 879.000 17.0000
## 28 0.9973 862.000 15.0000
## 29 0.9972 847.000 14.0000
## 30 0.9971 833.000 14.0000
## 31 0.9970 819.000 14.0000
## 32 0.9969 805.000 13.0000
## 33 0.9968 792.000 12.0000
## 34 0.9967 780.000 11.0000
## 35 0.9966 769.000 11.0000
## 36 0.9965 758.000 9.0000
## 37 0.9964 749.000 10.0000
## 38 0.9963 739.000 9.0000
## 39 0.9962 730.000 8.0000
## 40 0.9961 722.000 9.0000
## 41 0.9960 713.000 7.0000
## 42 0.9959 706.000 8.0000
## 43 0.9958 698.000 6.0000
## 44 0.9957 692.000 7.0000
## 45 0.9956 685.000 6.0000
## 46 0.9955 679.000 6.0000
## 47 0.9954 673.000 6.0000
## 48 0.9953 667.000 5.0000
## 49 0.9952 662.000 6.0000
## 50 0.9951 656.000 5.0000
percentiles <- NULL
Podemos ver que a diferença entre os valores dos percentis, os quais aumentam em 0.001 a cada linha, começam a aumentar repentinamente a partir do percentil 0.9984. Com isso, vamos estabelecer que a duração é um outlier quando for maior que 1100 segundos.
Vamos verificar se este ponto de corte de duração faz sentido em relação ao geral:
threshold <- 1100
temp5 %>%
ggplot(aes(y = duration, x = problem, colour=problem)) +
geom_jitter(aes(alpha = 0.5)) +
geom_violin() +
geom_hline(yintercept = threshold)
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
Podemos perceber que no geral, as durações acima do ponto de corte estão bem dispersas, enquanto as que estão abaixo são mais concentradas.
Com isso, vamos filtrar as observações que tem a duração menor que o limiar.
temp6 <- temp5 %>% filter(duration <= threshold)
temp5 <- NULL
p <- ggplot(temp6, aes(x = as.factor(hourOrig), y = duration))
p + geom_jitter(aes(alpha = 0.5, colour = problem))
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p + geom_boxplot()
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
p <- NULL
Vemos que alguns pontos ainda estão dispersos, mas estão bem mais concentrados do que antes do corte.
Além disso, podemos ver que entre as 7 e 18 horas a duração das viagens entre paradas são bastante parecidas, enquanto as demais tem valores mais baixos.
write.csv(x = temp6, file = "dadostratados.csv", row.names = F)
temp6 <- NULL